# -*- coding: utf-8 -*-
import copy
from zc_core.spiders.base import BaseSpider
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter

from guotie.rules import *


class FullSpider(BaseSpider):
    name = "full"
    # 详情页url
    item_url = "https://mall.95306.cn/proxy/item/mall/search/queryNormalItemDetails?platformId=20&itemId={}&areaId=-1"
    price_url = "https://mall.95306.cn/proxy/item/mall/search/querySkuPrice?platformId=20&skuIds={}"

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no, fields={'spuId': 1}, filter_key='spuId')

    def start_requests(self):
        cookies = {'st': '0e19cec81469c2f21d52d7d490ea7bba', 'AlteonPmall': '0a03b7f8ce46a8361f41'}
        if not cookies or 'st' not in cookies:
            self.logger.error('init cookie failed...')
            return
        self.authorization = cookies.get('st')
        self.logger.info('init cookie: %s', cookies)
        settings = get_project_settings()
        while_list = settings.get("CATALOG_WHITE_LIST")
        if while_list:
            spu_list = SkuPoolDao().get_distinct_spu_list(query={"$or": while_list})
        else:
            spu_list = SkuPoolDao().get_distinct_spu_list()

        self.logger.info('全量: %s' % (len(spu_list)))
        for spu_id in spu_list:
            # 避免重复采集
            if self.done_filter.contains(spu_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: %s', spu_id)
                continue

            yield Request(
                url=self.item_url.format(spu_id),
                meta={
                    'reqType': 'full',
                    'batchNo': self.batch_no,
                    "spuId": spu_id,
                },
                headers={
                    'Host': 'mall.95306.cn',
                    'Connection': 'keep-alive',
                    'Accept': 'application/json, text/javascript, */*; q=0.01',
                    'X-Requested-With': 'XMLHttpRequest',
                    'Authorization': self.authorization,
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400',
                    'Referer': 'https://mall.95306.cn/mall-view/',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                },
                callback=self.parse_item_data,
                errback=self.error_back
            )

    def parse_item_data(self, response):
        meta = response.meta
        spu_id = meta.get("spuId")
        item_dict = parse_item_data(response)
        if item_dict:
            # self.logger.info('数据: spu=%s, cnt=%s' % (spu_id, len(item_dict)))
            sku_ids = ','.join(item_dict.keys())
            yield Request(
                url=self.price_url.format(sku_ids),
                meta={
                    'reqType': 'price',
                    'batchNo': self.batch_no,
                    "spuId": spu_id,
                    "itemDict": copy.copy(item_dict),
                },
                headers={
                    'Host': 'mall.95306.cn',
                    'Connection': 'keep-alive',
                    'Accept': 'application/json, text/javascript, */*; q=0.01',
                    'X-Requested-With': 'XMLHttpRequest',
                    'Authorization': self.authorization,
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3775.400 QQBrowser/10.6.4208.400',
                    'Referer': 'https://mall.95306.cn/mall-view/',
                    'Accept-Encoding': 'gzip, deflate, br',
                    'Accept-Language': 'zh-CN,zh;q=0.9',
                },
                callback=self.parse_price,
                errback=self.error_back
            )

    def parse_price(self, response):
        if response.text:
            meta = response.meta
            spu_id = meta.get("spuId")
            item_dict = meta.get("itemDict")
            done_sku, miss_sku = parse_price(response)
            if done_sku:
                self.logger.info(
                    '商品: spu=%s, total=%s, done=%s, miss=%s' % (spu_id, len(item_dict), len(done_sku), len(miss_sku)))
                yield Box('item', self.batch_no, done_sku)
            if miss_sku:
                self.logger.error('缺失: spu=%s, skus=%s' % (spu_id, miss_sku))


